"How does the combination of economic status (GDP per capita) and healthcare access (represented by immunization rates for Hepatitis B, Polio, and Diphtheria) affect under-five mortality rates in various climate zones within developing countries over the last decade?"¶

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

# Read the dataset
df = pd.read_csv("Life_expectancy_merged.csv")
df
Out[4]:
Country Region Year Infant_deaths Under_five_deaths Adult_mortality Alcohol_consumption Hepatitis_B Measles BMI ... Population_mln Thinness_ten_nineteen_years Thinness_five_nine_years Schooling Economy_status_Developed Economy_status_Developing Life_expectancy Climate_Zone AQI Status
0 Turkiye Middle East 2015 11.1 13.0 105.8240 1.320 97 65 27.8 ... 78.53 4.9 4.8 7.8 0 1 76.5 Temperate 124.603599 Unhealthy
1 Spain European Union 2015 2.7 3.3 57.9025 10.350 97 94 26.0 ... 46.44 0.6 0.5 9.7 1 0 82.8 Mediterranean 77.987395 Moderate
2 India Asia 2007 51.5 67.9 201.0765 1.570 60 35 21.2 ... 1183.21 27.1 28.0 5.0 0 1 65.4 Diverse 66.966041 Moderate
3 Guyana South America 2006 32.8 40.5 222.1965 5.680 93 74 25.3 ... 0.75 5.7 5.5 7.9 0 1 67.0 Tropical 87.566295 Moderate
4 Israel Middle East 2012 3.4 4.3 57.9510 2.890 97 89 27.0 ... 7.91 1.2 1.1 12.8 1 0 81.7 Dry 100.953959 Hazardous
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2859 Niger Africa 2000 97.0 224.9 291.8240 0.092 72 64 20.8 ... 11.33 12.8 12.9 1.1 0 1 49.9 Tropical 104.715199 Unhealthy
2860 Mongolia Asia 2009 23.9 28.6 235.2330 6.560 97 97 25.3 ... 2.67 2.2 2.3 9.1 0 1 66.9 Dry 34.276856 Good
2861 Sri Lanka Asia 2004 17.7 28.9 134.8950 1.560 62 95 21.9 ... 19.39 15.4 15.5 10.3 0 1 74.3 Tropical 90.565837 Moderate
2862 Lithuania European Union 2002 7.9 9.9 204.0120 11.000 94 95 26.1 ... 3.44 3.3 3.3 11.1 1 0 71.8 Temperate 102.015136 Unhealthy
2863 Iceland Rest of Europe 2011 2.1 2.6 50.5745 6.840 88 90 26.1 ... 0.32 0.9 0.9 11.0 1 0 82.4 Temperate 82.321342 Moderate

2864 rows × 24 columns

In [5]:
data = df[df['Year'] >= 2013]

# Selecting relevant columns
columns = ['Country', 'Year', 'Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 
           'Polio', 'Diphtheria', 'Climate_Zone', 'Economy_status_Developing']
data = data[columns]

# Remove NaN values
data.dropna(inplace=True)

data.head()
Out[5]:
Country Year Under_five_deaths GDP_per_capita Hepatitis_B Polio Diphtheria Climate_Zone Economy_status_Developing
0 Turkiye 2015 13.0 11006 97 97 97 Temperate 1
1 Spain 2015 3.3 25742 97 97 97 Mediterranean 0
6 Russian Federation 2015 8.2 9313 97 97 97 Diverse 1
16 Finland 2013 2.7 43045 88 98 98 Temperate 0
25 Belize 2013 16.9 4667 95 95 95 Tropical 1
In [6]:
def remove_outliers(df, column_list):
    for column in column_list:
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df

# Removing outliers from the relevant columns
outlier_columns = ['Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria']
data = remove_outliers(data, outlier_columns)
In [8]:
data
Out[8]:
Country Year Under_five_deaths GDP_per_capita Hepatitis_B Polio Diphtheria Climate_Zone Economy_status_Developing
0 Turkiye 2015 13.0 11006 97 97 97 Temperate 1
1 Spain 2015 3.3 25742 97 97 97 Mediterranean 0
6 Russian Federation 2015 8.2 9313 97 97 97 Diverse 1
25 Belize 2013 16.9 4667 95 95 95 Tropical 1
27 Cameroon 2015 88.0 1383 84 77 84 Tropical 1
... ... ... ... ... ... ... ... ... ...
2840 Djibouti 2015 65.8 2653 84 84 84 Desert 1
2843 Kiribati 2014 59.2 1417 75 79 75 Tropical 1
2846 Nicaragua 2015 19.4 2050 98 99 98 Tropical 1
2848 Bahrain 2015 7.6 22634 98 98 98 Desert 1
2854 Fiji 2013 23.7 4902 99 99 99 Tropical 1

404 rows × 9 columns

In [9]:
data.describe()
Out[9]:
Year Under_five_deaths GDP_per_capita Hepatitis_B Polio Diphtheria Economy_status_Developing
count 404.000000 404.000000 404.000000 404.000000 404.000000 404.000000 404.000000
mean 2013.995050 30.437871 7229.750000 91.564356 91.349010 91.732673 0.873762
std 0.818505 26.519279 7138.525863 7.273346 8.013294 7.352373 0.332529
min 2013.000000 2.600000 306.000000 70.000000 65.000000 72.000000 0.000000
25% 2013.000000 10.350000 1727.500000 88.000000 87.000000 88.000000 1.000000
50% 2014.000000 19.300000 4833.500000 94.000000 94.000000 94.000000 1.000000
75% 2015.000000 49.425000 10095.750000 97.000000 98.000000 98.000000 1.000000
max 2015.000000 108.300000 32136.000000 99.000000 99.000000 99.000000 1.000000
In [13]:
# Normalizing the data
normalization_columns = ['Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria']
scaler = MinMaxScaler()
data[normalization_columns] = scaler.fit_transform(data[normalization_columns])
# Display the normalized data
print("Normalized Data:")
print(data[normalization_columns].head())
Normalized Data:
    Under_five_deaths  GDP_per_capita  Hepatitis_B     Polio  Diphtheria
0            0.098392        0.336161     0.931034  0.941176    0.925926
1            0.006623        0.799120     0.931034  0.941176    0.925926
6            0.052980        0.282972     0.931034  0.941176    0.925926
25           0.135289        0.137009     0.862069  0.882353    0.851852
27           0.807947        0.033836     0.482759  0.352941    0.444444
In [14]:
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming 'data' is your DataFrame and 'normalization_columns' contains the columns you normalized

# Creating histograms for each normalized column
for column in normalization_columns:
    plt.figure(figsize=(8, 4))
    sns.histplot(data[column], bins=20, kde=True)
    plt.title(f'Histogram of Normalized {column}')
    plt.xlabel(column)
    plt.ylabel('Frequency')
    plt.show()
In [15]:
import scipy.stats as stats

# Creating Q-Q plots for each normalized column
for column in normalization_columns:
    plt.figure(figsize=(6, 6))
    stats.probplot(data[column], dist="norm", plot=plt)
    plt.title(f'Q-Q Plot of Normalized {column}')
    plt.show()

the null hypothesis = that the data is normally distributed Alternate hypothesis= the data is not normally distributed

In [17]:
import pandas as pd
from scipy.stats import shapiro

# Assuming 'data' is your DataFrame with normalized data

# List of columns to test for normality
columns_to_test = ['Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria']

# Performing Shapiro-Wilk test on each column
for column in columns_to_test:
    stat, p = shapiro(data[column])
    print(f'Normality test for {column}: Statistics={stat:.3f}, p={p:.3f}')

    # Interpretation
    alpha = 0.05
    if p > alpha:
        print(f'  {column} looks Gaussian (fail to reject H0)')
    else:
        print(f'  {column} does not look Gaussian (reject H0)')
Normality test for Under_five_deaths: Statistics=0.851, p=0.000
  Under_five_deaths does not look Gaussian (reject H0)
Normality test for GDP_per_capita: Statistics=0.828, p=0.000
  GDP_per_capita does not look Gaussian (reject H0)
Normality test for Hepatitis_B: Statistics=0.866, p=0.000
  Hepatitis_B does not look Gaussian (reject H0)
Normality test for Polio: Statistics=0.851, p=0.000
  Polio does not look Gaussian (reject H0)
Normality test for Diphtheria: Statistics=0.856, p=0.000
  Diphtheria does not look Gaussian (reject H0)

The results from your Shapiro-Wilk normality tests indicate that the data in all the tested columns ('Under_five_deaths', 'GDP_per_capita', 'Hepatitis_B', 'Polio', and 'Diphtheria') do not follow a normal distribution (Gaussian distribution). This conclusion is drawn from the fact that the p-values for all these columns are very small (0.000), leading to the rejection of the null hypothesis that the data is normally distributed.

Considering our nature of data- combination of non-parametric statistical tests and possibly some multivariate analysis would be appropriate.

  1. Spearman's Rank Correlation First, we will assess the relationship between continuous variables such as 'GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria', and 'Under_five_deaths'. Spearman's rank correlation is suitable for this as it does not assume normal distribution and is good for identifying monotonic relationships.
In [18]:
import pandas as pd
from scipy.stats import spearmanr

# Assuming 'data' is your DataFrame
columns_to_correlate = ['GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria', 'Under_five_deaths']

# Calculating Spearman's Rank Correlation
for col1 in columns_to_correlate:
    for col2 in columns_to_correlate:
        if col1 != col2:
            coef, p = spearmanr(data[col1], data[col2])
            print(f"Spearman correlation between {col1} and {col2}: Coefficient={coef:.3f}, P-value={p:.3f}")
Spearman correlation between GDP_per_capita and Hepatitis_B: Coefficient=0.340, P-value=0.000
Spearman correlation between GDP_per_capita and Polio: Coefficient=0.391, P-value=0.000
Spearman correlation between GDP_per_capita and Diphtheria: Coefficient=0.410, P-value=0.000
Spearman correlation between GDP_per_capita and Under_five_deaths: Coefficient=-0.823, P-value=0.000
Spearman correlation between Hepatitis_B and GDP_per_capita: Coefficient=0.340, P-value=0.000
Spearman correlation between Hepatitis_B and Polio: Coefficient=0.884, P-value=0.000
Spearman correlation between Hepatitis_B and Diphtheria: Coefficient=0.936, P-value=0.000
Spearman correlation between Hepatitis_B and Under_five_deaths: Coefficient=-0.427, P-value=0.000
Spearman correlation between Polio and GDP_per_capita: Coefficient=0.391, P-value=0.000
Spearman correlation between Polio and Hepatitis_B: Coefficient=0.884, P-value=0.000
Spearman correlation between Polio and Diphtheria: Coefficient=0.919, P-value=0.000
Spearman correlation between Polio and Under_five_deaths: Coefficient=-0.493, P-value=0.000
Spearman correlation between Diphtheria and GDP_per_capita: Coefficient=0.410, P-value=0.000
Spearman correlation between Diphtheria and Hepatitis_B: Coefficient=0.936, P-value=0.000
Spearman correlation between Diphtheria and Polio: Coefficient=0.919, P-value=0.000
Spearman correlation between Diphtheria and Under_five_deaths: Coefficient=-0.495, P-value=0.000
Spearman correlation between Under_five_deaths and GDP_per_capita: Coefficient=-0.823, P-value=0.000
Spearman correlation between Under_five_deaths and Hepatitis_B: Coefficient=-0.427, P-value=0.000
Spearman correlation between Under_five_deaths and Polio: Coefficient=-0.493, P-value=0.000
Spearman correlation between Under_five_deaths and Diphtheria: Coefficient=-0.495, P-value=0.000

Interpretation:

  1. Positive Correlations with GDP Per Capita: There are positive correlations between GDP per capita and immunization rates (Hepatitis B, Polio, Diphtheria), suggesting that higher economic status is generally associated with better immunization coverage.

  2. Strong Positive Correlations Among Immunization Rates: The immunization rates for Hepatitis B, Polio, and Diphtheria are highly correlated with each other, indicating that improvements in healthcare access in one area often accompany improvements in others.

  3. Negative Correlations with Under-Five Deaths: There are strong negative correlations between under-five mortality rates and both GDP per capita and immunization rates. This indicates that higher economic status and better immunization coverage are associated with lower under-five mortality rates.

In summary, these correlations suggest a significant association between economic status, healthcare access (as measured by immunization rates), and child mortality, where better economic conditions and improved healthcare access are linked to lower under-five mortality rates.

  1. Kruskal-Wallis Test Since you're interested in how under-five mortality rates vary across different climate zones, the Kruskal-Wallis test can be used. This test is the non-parametric version of ANOVA and is used when comparing more than two groups.
In [19]:
from scipy.stats import kruskal

# Assuming 'data' has a column 'Climate_Zone'
# Comparing 'Under_five_deaths' across different 'Climate_Zone'
climate_zones = data['Climate_Zone'].unique()
grouped_data = [data['Under_five_deaths'][data['Climate_Zone'] == zone] for zone in climate_zones]

stat, p = kruskal(*grouped_data)
print(f"Kruskal-Wallis Test: Statistics={stat:.3f}, p={p:.3f}")
Kruskal-Wallis Test: Statistics=152.658, p=0.000

Interpretation:¶

Statistical Significance: The very low p-value (0.000) suggests that there are statistically significant differences in under-five mortality rates among the various climate zones in your dataset.

Climate Zone Impact: This finding implies that the climate zone is a factor that differentiates under-five mortality rates. It supports the idea that the mortality rate for children under five is not uniform across different climate zones.

Post-Hoc Analysis: Since the Kruskal-Wallis test indicates that there are differences but does not specify between which climate zones these differences occur, you might consider conducting post-hoc tests. Methods like the Dunn's test can be used to compare specific pairs of climate zones to identify where the significant differences lie.

In [22]:
pip install statsmodels
Requirement already satisfied: statsmodels in c:\users\palla\anaconda3\lib\site-packages (0.13.2)
Requirement already satisfied: numpy>=1.17 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (1.21.5)
Requirement already satisfied: scipy>=1.3 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (1.9.1)
Requirement already satisfied: pandas>=0.25 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (1.4.4)
Requirement already satisfied: patsy>=0.5.2 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (0.5.2)
Requirement already satisfied: packaging>=21.3 in c:\users\palla\anaconda3\lib\site-packages (from statsmodels) (21.3)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in c:\users\palla\anaconda3\lib\site-packages (from packaging>=21.3->statsmodels) (3.0.9)
Requirement already satisfied: python-dateutil>=2.8.1 in c:\users\palla\anaconda3\lib\site-packages (from pandas>=0.25->statsmodels) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\palla\anaconda3\lib\site-packages (from pandas>=0.25->statsmodels) (2022.1)
Requirement already satisfied: six in c:\users\palla\anaconda3\lib\site-packages (from patsy>=0.5.2->statsmodels) (1.16.0)
Note: you may need to restart the kernel to use updated packages.
In [23]:
import pandas as pd
from scipy.stats import kruskal
from statsmodels.stats.multicomp import pairwise_tukeyhsd, MultiComparison

# Assuming 'data' is your DataFrame and has columns 'Climate_Zone' and 'Under_five_deaths'

# Conducting Kruskal-Wallis Test
climate_zones = data['Climate_Zone'].unique()
grouped_data = [data['Under_five_deaths'][data['Climate_Zone'] == zone] for zone in climate_zones]
stat, p = kruskal(*grouped_data)
print(f"Kruskal-Wallis Test: Statistics={stat:.3f}, p={p:.3f}")

# Conducting Dunn's Post-Hoc Test
mc = MultiComparison(data['Under_five_deaths'], data['Climate_Zone'])
result = mc.tukeyhsd()

print(result)
print(mc.groupsunique)
Kruskal-Wallis Test: Statistics=152.658, p=0.000
       Multiple Comparison of Means - Tukey HSD, FWER=0.05        
==================================================================
    group1        group2    meandiff p-adj   lower   upper  reject
------------------------------------------------------------------
       Desert       Diverse  -0.0171 0.9999 -0.2331  0.1989  False
       Desert           Dry   0.0833 0.5763 -0.0627  0.2294  False
       Desert Mediterranean  -0.2193 0.0059 -0.3967  -0.042   True
       Desert     Temperate  -0.1319 0.0397 -0.2602 -0.0037   True
       Desert      Tropical   0.1032 0.1073 -0.0118  0.2182  False
      Diverse           Dry   0.1004 0.7584 -0.1131  0.3139  False
      Diverse Mediterranean  -0.2022 0.1408 -0.4383  0.0338  False
      Diverse     Temperate  -0.1148 0.5795 -0.3166   0.087  False
      Diverse      Tropical   0.1203 0.4805 -0.0733  0.3139  False
          Dry Mediterranean  -0.3027    0.0  -0.477 -0.1283   True
          Dry     Temperate  -0.2152    0.0 -0.3393 -0.0912   True
          Dry      Tropical   0.0198 0.9956 -0.0904  0.1301  False
Mediterranean     Temperate   0.0874 0.6205 -0.0723  0.2472  False
Mediterranean      Tropical   0.3225    0.0  0.1732  0.4718   True
    Temperate      Tropical   0.2351    0.0  0.1498  0.3204   True
------------------------------------------------------------------
['Desert' 'Diverse' 'Dry' 'Mediterranean' 'Temperate' 'Tropical']

The Tukey HSD post-hoc test results following the Kruskal-Wallis test show significant differences in under-five mortality rates among various climate zones. Key findings include:

  1. Desert vs. Mediterranean and Temperate: Mortality rates are significantly lower in Mediterranean and Temperate zones compared to Desert zones.

  2. Dry vs. Mediterranean and Temperate: Similarly, Mediterranean and Temperate zones have significantly lower mortality rates than Dry zones.

  3. Mediterranean vs. Tropical: Tropical zones have significantly higher mortality rates compared to Mediterranean zones.

  4. Temperate vs. Tropical: Temperate zones have significantly lower mortality rates than Tropical zones.

Tropical Zone > Desert Zone > Dry Zone > Temperate Zone > Mediterranean Zone

Final Inference:

  1. GDP per Capita and Healthcare Access: Countries with higher GDP per capita usually have better healthcare access, including higher immunization rates for Hepatitis B, Polio, and Diphtheria.

  2. Vaccinations and Under-Five Deaths: When more children get vaccinated, the number of under-five deaths decreases. This shows that better healthcare access helps in reducing child mortality.

  3. Different Climates, Different Results: The impact of GDP per capita and healthcare access on under-five deaths varies depending on the climate zone. For instance, in Tropical climates, under-five deaths are higher compared to Mediterranean or Temperate climates.

  4. Climate's Influence: The study found that the climate zone plays a role in the number of under-five deaths in those areas.

Overall Finding: In developing countries, higher GDP per capita and better healthcare access, especially in terms of immunization, are linked to lower under-five deaths. However, the climate zone also influences these outcomes, with some climates having higher rates of under-five deaths than others.

In [25]:
###Visualization
  1. Scatter Plot for GDP per Capita vs. Immunization Rates: This plot will show the relationship between the economic status of a country and its immunization rates.
In [26]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.scatterplot(x='GDP_per_capita', y='Hepatitis_B', data=data, label='Hepatitis B')
sns.scatterplot(x='GDP_per_capita', y='Polio', data=data, label='Polio')
sns.scatterplot(x='GDP_per_capita', y='Diphtheria', data=data, label='Diphtheria')
plt.xlabel('GDP per Capita')
plt.ylabel('Immunization Rate')
plt.title('GDP per Capita vs. Immunization Rates')
plt.legend()
plt.show()
  1. Scatter Plot for GDP per Capita vs. Under-Five Deaths: This plot will illustrate how under-five mortality rates correlate with GDP per capita.
In [28]:
sns.scatterplot(x='GDP_per_capita', y='Under_five_deaths', data=data)
plt.xlabel('GDP per Capita')
plt.ylabel('Under-Five Deaths')
plt.title('GDP per Capita vs. Under-Five Deaths')
plt.show()
  1. Box Plot for Climate Zone vs. Under-Five Deaths: This plot will show the distribution of under-five deaths across different climate zones.
In [29]:
sns.boxplot(x='Climate_Zone', y='Under_five_deaths', data=data)
plt.xlabel('Climate Zone')
plt.ylabel('Under-Five Deaths')
plt.title('Under-Five Deaths in Different Climate Zones')
plt.xticks(rotation=45)
plt.show()
  1. Heatmap for Spearman Correlation Matrix: A heatmap to visualize the Spearman correlation among GDP per capita, immunization rates, and under-five deaths.
In [30]:
import numpy as np

correlation_matrix = data[['GDP_per_capita', 'Hepatitis_B', 'Polio', 'Diphtheria', 'Under_five_deaths']].corr(method='spearman')
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Spearman Correlation Matrix')
plt.show()
In [31]:
pip install geopandas matplotlib
Collecting geopandas
  Downloading geopandas-0.14.1-py3-none-any.whl (1.1 MB)
     ---------------------------------------- 1.1/1.1 MB 7.0 MB/s eta 0:00:00
Requirement already satisfied: matplotlib in c:\users\palla\anaconda3\lib\site-packages (3.5.2)
Collecting fiona>=1.8.21
  Downloading fiona-1.9.5-cp39-cp39-win_amd64.whl (22.9 MB)
     ---------------------------------------- 22.9/22.9 MB 9.3 MB/s eta 0:00:00
Requirement already satisfied: pandas>=1.4.0 in c:\users\palla\anaconda3\lib\site-packages (from geopandas) (1.4.4)
Collecting shapely>=1.8.0
  Downloading shapely-2.0.2-cp39-cp39-win_amd64.whl (1.4 MB)
     ---------------------------------------- 1.4/1.4 MB 11.4 MB/s eta 0:00:00
Requirement already satisfied: packaging in c:\users\palla\anaconda3\lib\site-packages (from geopandas) (21.3)
Collecting pyproj>=3.3.0
  Downloading pyproj-3.6.1-cp39-cp39-win_amd64.whl (6.1 MB)
     ---------------------------------------- 6.1/6.1 MB 13.9 MB/s eta 0:00:00
Requirement already satisfied: fonttools>=4.22.0 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (4.25.0)
Requirement already satisfied: python-dateutil>=2.7 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (2.8.2)
Requirement already satisfied: numpy>=1.17 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (1.21.5)
Requirement already satisfied: pyparsing>=2.2.1 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (3.0.9)
Requirement already satisfied: pillow>=6.2.0 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (9.2.0)
Requirement already satisfied: cycler>=0.10 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (0.11.0)
Requirement already satisfied: kiwisolver>=1.0.1 in c:\users\palla\anaconda3\lib\site-packages (from matplotlib) (1.4.2)
Requirement already satisfied: attrs>=19.2.0 in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (21.4.0)
Requirement already satisfied: importlib-metadata in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (4.11.3)
Requirement already satisfied: six in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (1.16.0)
Requirement already satisfied: click~=8.0 in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (8.0.4)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Requirement already satisfied: certifi in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (2022.9.14)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Requirement already satisfied: setuptools in c:\users\palla\anaconda3\lib\site-packages (from fiona>=1.8.21->geopandas) (63.4.1)
Requirement already satisfied: pytz>=2020.1 in c:\users\palla\anaconda3\lib\site-packages (from pandas>=1.4.0->geopandas) (2022.1)
Requirement already satisfied: colorama in c:\users\palla\anaconda3\lib\site-packages (from click~=8.0->fiona>=1.8.21->geopandas) (0.4.5)
Requirement already satisfied: zipp>=0.5 in c:\users\palla\anaconda3\lib\site-packages (from importlib-metadata->fiona>=1.8.21->geopandas) (3.8.0)
Installing collected packages: shapely, pyproj, cligj, click-plugins, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.9.5 geopandas-0.14.1 pyproj-3.6.1 shapely-2.0.2
Note: you may need to restart the kernel to use updated packages.
In [34]:
pip install plotly
Requirement already satisfied: plotly in c:\users\palla\anaconda3\lib\site-packages (5.9.0)
Requirement already satisfied: tenacity>=6.2.0 in c:\users\palla\anaconda3\lib\site-packages (from plotly) (8.0.1)
Note: you may need to restart the kernel to use updated packages.
In [37]:
pip install pycountry
Collecting pycountry
  Downloading pycountry-22.3.5.tar.gz (10.1 MB)
     --------------------------------------- 10.1/10.1 MB 11.4 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Requirement already satisfied: setuptools in c:\users\palla\anaconda3\lib\site-packages (from pycountry) (63.4.1)
Building wheels for collected packages: pycountry
  Building wheel for pycountry (pyproject.toml): started
  Building wheel for pycountry (pyproject.toml): finished with status 'done'
  Created wheel for pycountry: filename=pycountry-22.3.5-py2.py3-none-any.whl size=10681895 sha256=598b233dac8865bbcdfdebe84efae2d341d7a26cfe3e1554c68560856b284774
  Stored in directory: c:\users\palla\appdata\local\pip\cache\wheels\47\15\92\e6dc85fcb0686c82e1edbcfdf80cfe4808c058813fed0baa8f
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-22.3.5
Note: you may need to restart the kernel to use updated packages.
In [38]:
import pycountry
import pandas as pd

# Assuming 'data' is your DataFrame

# Function to get ISO code for a country
def get_iso_code(country):
    try:
        return pycountry.countries.get(name=country).alpha_3
    except:
        return None

# Apply the function to your country column
data['iso_alpha'] = data['Country'].apply(get_iso_code)
In [39]:
import plotly.express as px

# Now that 'iso_alpha' is added to 'data'
fig = px.choropleth(data, 
                    locations="iso_alpha", 
                    color="Climate_Zone",
                    hover_name="Country", 
                    color_continuous_scale=px.colors.sequential.Plasma)

fig.update_layout(title_text='Climate Zones by Country')
fig.show()
Climate_ZoneTemperateMediterraneanDiverseTropicalDesertDryClimate Zones by Country
plotly-logomark
In [41]:
# Calculate the average immunization rate
data['Avg_Immunization'] = data[['Hepatitis_B', 'Polio', 'Diphtheria']].mean(axis=1)
import plotly.express as px

# Assuming you have now added 'Avg_Immunization' to 'data'
fig_immunization = px.choropleth(data, 
                                 locations="iso_alpha", 
                                 color="Avg_Immunization",
                                 hover_name="Country", 
                                 color_continuous_scale=px.colors.sequential.Viridis)

fig_immunization.update_layout(title_text='Average Immunization Rates by Country')
fig_immunization.show()
0.20.40.60.81Avg_ImmunizationAverage Immunization Rates by Country
plotly-logomark
In [42]:
# Map for Under-Five Deaths
fig_deaths = px.choropleth(data, 
                           locations="iso_alpha", 
                           color="Under_five_deaths",
                           hover_name="Country", 
                           color_continuous_scale=px.colors.sequential.OrRd)
fig_deaths.update_layout(title_text='Under-Five Deaths by Country')
fig_deaths.show()
00.20.40.60.81Under_five_deathsUnder-Five Deaths by Country
plotly-logomark
In [ ]: